import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
population_density = pd.read_csv('F:\\STUDIA\\7SEM\\mit\\zad2\\dane\\population_density.csv')
population_density.head()
| DATAFLOW | LAST UPDATE | freq | unit | geo | TIME_PERIOD | OBS_VALUE | OBS_FLAG | |
|---|---|---|---|---|---|---|---|---|
| 0 | ESTAT:TPS00003(1.0) | 20/08/21 23:00:00 | A | PER_KM2 | AL | 2012 | 100.7 | NaN |
| 1 | ESTAT:TPS00003(1.0) | 20/08/21 23:00:00 | A | PER_KM2 | AL | 2013 | 100.6 | NaN |
| 2 | ESTAT:TPS00003(1.0) | 20/08/21 23:00:00 | A | PER_KM2 | AL | 2014 | 100.4 | NaN |
| 3 | ESTAT:TPS00003(1.0) | 20/08/21 23:00:00 | A | PER_KM2 | AL | 2015 | 100.1 | NaN |
| 4 | ESTAT:TPS00003(1.0) | 20/08/21 23:00:00 | A | PER_KM2 | AL | 2016 | 99.9 | NaN |
population_density = population_density.drop(columns = ['DATAFLOW','LAST UPDATE', 'freq', 'unit', 'OBS_FLAG'])
population_density.head()
| geo | TIME_PERIOD | OBS_VALUE | |
|---|---|---|---|
| 0 | AL | 2012 | 100.7 |
| 1 | AL | 2013 | 100.6 |
| 2 | AL | 2014 | 100.4 |
| 3 | AL | 2015 | 100.1 |
| 4 | AL | 2016 | 99.9 |
population_density.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 464 entries, 0 to 463 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 geo 464 non-null object 1 TIME_PERIOD 464 non-null int64 2 OBS_VALUE 464 non-null float64 dtypes: float64(1), int64(1), object(1) memory usage: 11.0+ KB
women_per_100_men = pd.read_csv('F:\\STUDIA\\7SEM\\mit\\zad2\\dane\\women_per_100_men.csv')
women_per_100_men.head()
| DATAFLOW | LAST UPDATE | freq | indic_de | geo | TIME_PERIOD | OBS_VALUE | OBS_FLAG | |
|---|---|---|---|---|---|---|---|---|
| 0 | ESTAT:TPS00011(1.0) | 22/06/22 23:00:00 | A | PC_FM | AD | 2010 | 92.0 | NaN |
| 1 | ESTAT:TPS00011(1.0) | 22/06/22 23:00:00 | A | PC_FM | AD | 2011 | 96.0 | NaN |
| 2 | ESTAT:TPS00011(1.0) | 22/06/22 23:00:00 | A | PC_FM | AD | 2012 | 96.0 | NaN |
| 3 | ESTAT:TPS00011(1.0) | 22/06/22 23:00:00 | A | PC_FM | AD | 2013 | 96.3 | NaN |
| 4 | ESTAT:TPS00011(1.0) | 22/06/22 23:00:00 | A | PC_FM | AD | 2019 | 96.4 | NaN |
women_per_100_men.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 593 entries, 0 to 592 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 DATAFLOW 593 non-null object 1 LAST UPDATE 593 non-null object 2 freq 593 non-null object 3 indic_de 593 non-null object 4 geo 593 non-null object 5 TIME_PERIOD 593 non-null int64 6 OBS_VALUE 592 non-null float64 7 OBS_FLAG 66 non-null object dtypes: float64(1), int64(1), object(6) memory usage: 37.2+ KB
women_per_100_men = women_per_100_men.drop(columns = ['DATAFLOW','LAST UPDATE', 'freq', 'indic_de', 'OBS_FLAG'])
women_per_100_men.head()
| geo | TIME_PERIOD | OBS_VALUE | |
|---|---|---|---|
| 0 | AD | 2010 | 92.0 |
| 1 | AD | 2011 | 96.0 |
| 2 | AD | 2012 | 96.0 |
| 3 | AD | 2013 | 96.3 |
| 4 | AD | 2019 | 96.4 |
old_age_dependency_ratio = pd.read_csv('F:\\STUDIA\\7SEM\\mit\\zad2\\dane\\old_age_dependency_ratio.csv')
old_age_dependency_ratio.head()
| DATAFLOW | LAST UPDATE | freq | indic_de | geo | TIME_PERIOD | OBS_VALUE | OBS_FLAG | |
|---|---|---|---|---|---|---|---|---|
| 0 | ESTAT:TPS00198(1.0) | 22/06/22 23:00:00 | A | OLDDEP1 | AD | 2010 | 17.7 | NaN |
| 1 | ESTAT:TPS00198(1.0) | 22/06/22 23:00:00 | A | OLDDEP1 | AD | 2011 | 17.6 | NaN |
| 2 | ESTAT:TPS00198(1.0) | 22/06/22 23:00:00 | A | OLDDEP1 | AD | 2012 | 17.6 | NaN |
| 3 | ESTAT:TPS00198(1.0) | 22/06/22 23:00:00 | A | OLDDEP1 | AD | 2013 | 17.5 | NaN |
| 4 | ESTAT:TPS00198(1.0) | 22/06/22 23:00:00 | A | OLDDEP1 | AD | 2019 | 18.7 | NaN |
old_age_dependency_ratio = old_age_dependency_ratio.drop(columns = ['DATAFLOW','LAST UPDATE', 'freq', 'indic_de', 'OBS_FLAG'])
old_age_dependency_ratio.head()
| geo | TIME_PERIOD | OBS_VALUE | |
|---|---|---|---|
| 0 | AD | 2010 | 17.7 |
| 1 | AD | 2011 | 17.6 |
| 2 | AD | 2012 | 17.6 |
| 3 | AD | 2013 | 17.5 |
| 4 | AD | 2019 | 18.7 |
old_age_dependency_ratio.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 579 entries, 0 to 578 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 geo 579 non-null object 1 TIME_PERIOD 579 non-null int64 2 OBS_VALUE 578 non-null float64 dtypes: float64(1), int64(1), object(1) memory usage: 13.7+ KB
proportion_of_population_aged_65_and_over = pd.read_csv('F:\\STUDIA\\7SEM\\mit\\zad2\\dane\\proportion_of_population_aged_65_and_over.csv')
proportion_of_population_aged_65_and_over.head()
| DATAFLOW | LAST UPDATE | freq | indic_de | geo | TIME_PERIOD | OBS_VALUE | OBS_FLAG | |
|---|---|---|---|---|---|---|---|---|
| 0 | ESTAT:TPS00028(1.0) | 22/06/22 23:00:00 | A | PC_Y65_MAX | AD | 2010 | 12.9 | NaN |
| 1 | ESTAT:TPS00028(1.0) | 22/06/22 23:00:00 | A | PC_Y65_MAX | AD | 2011 | 12.6 | NaN |
| 2 | ESTAT:TPS00028(1.0) | 22/06/22 23:00:00 | A | PC_Y65_MAX | AD | 2012 | 12.6 | NaN |
| 3 | ESTAT:TPS00028(1.0) | 22/06/22 23:00:00 | A | PC_Y65_MAX | AD | 2013 | 12.6 | NaN |
| 4 | ESTAT:TPS00028(1.0) | 22/06/22 23:00:00 | A | PC_Y65_MAX | AD | 2019 | 13.6 | NaN |
proportion_of_population_aged_65_and_over = proportion_of_population_aged_65_and_over.drop(columns = ['DATAFLOW','LAST UPDATE', 'freq', 'indic_de', 'OBS_FLAG'])
proportion_of_population_aged_65_and_over.head()
| geo | TIME_PERIOD | OBS_VALUE | |
|---|---|---|---|
| 0 | AD | 2010 | 12.9 |
| 1 | AD | 2011 | 12.6 |
| 2 | AD | 2012 | 12.6 |
| 3 | AD | 2013 | 12.6 |
| 4 | AD | 2019 | 13.6 |
proportion_of_population_aged_65_and_over.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 579 entries, 0 to 578 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 geo 579 non-null object 1 TIME_PERIOD 579 non-null int64 2 OBS_VALUE 578 non-null float64 dtypes: float64(1), int64(1), object(1) memory usage: 13.7+ KB
population_by_age_group = pd.read_csv('F:\\STUDIA\\7SEM\\mit\\zad2\\dane\\population_by_age_group.csv')
population_by_age_group.head()
| DATAFLOW | LAST UPDATE | freq | indic_de | geo | TIME_PERIOD | OBS_VALUE | OBS_FLAG | |
|---|---|---|---|---|---|---|---|---|
| 0 | ESTAT:TPS00010(1.0) | 22/06/22 23:00:00 | A | PC_Y0_14 | AD | 2010 | 14.5 | NaN |
| 1 | ESTAT:TPS00010(1.0) | 22/06/22 23:00:00 | A | PC_Y0_14 | AD | 2011 | 15.5 | NaN |
| 2 | ESTAT:TPS00010(1.0) | 22/06/22 23:00:00 | A | PC_Y0_14 | AD | 2012 | 15.5 | NaN |
| 3 | ESTAT:TPS00010(1.0) | 22/06/22 23:00:00 | A | PC_Y0_14 | AD | 2013 | 15.5 | NaN |
| 4 | ESTAT:TPS00010(1.0) | 22/06/22 23:00:00 | A | PC_Y0_14 | AD | 2019 | 13.9 | NaN |
population_by_age_group = population_by_age_group.drop(columns = ['DATAFLOW','LAST UPDATE', 'freq', 'OBS_FLAG'])
population_by_age_group.head()
| indic_de | geo | TIME_PERIOD | OBS_VALUE | |
|---|---|---|---|---|
| 0 | PC_Y0_14 | AD | 2010 | 14.5 |
| 1 | PC_Y0_14 | AD | 2011 | 15.5 |
| 2 | PC_Y0_14 | AD | 2012 | 15.5 |
| 3 | PC_Y0_14 | AD | 2013 | 15.5 |
| 4 | PC_Y0_14 | AD | 2019 | 13.9 |
population_by_age_group.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3474 entries, 0 to 3473 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 indic_de 3474 non-null object 1 geo 3474 non-null object 2 TIME_PERIOD 3474 non-null int64 3 OBS_VALUE 3468 non-null float64 dtypes: float64(1), int64(1), object(2) memory usage: 108.7+ KB
population_change = pd.read_csv('F:\\STUDIA\\7SEM\\mit\\zad2\\dane\\population_change.csv')
population_change.head()
| DATAFLOW | LAST UPDATE | freq | indic_de | geo | TIME_PERIOD | OBS_VALUE | OBS_FLAG | |
|---|---|---|---|---|---|---|---|---|
| 0 | ESTAT:TPS00019(1.0) | 11/07/22 11:00:00 | A | CNMIGRATRT | AD | 2010 | 4.1 | NaN |
| 1 | ESTAT:TPS00019(1.0) | 11/07/22 11:00:00 | A | CNMIGRATRT | AD | 2012 | -29.8 | NaN |
| 2 | ESTAT:TPS00019(1.0) | 11/07/22 11:00:00 | A | CNMIGRATRT | AD | 2016 | 14.5 | e |
| 3 | ESTAT:TPS00019(1.0) | 11/07/22 11:00:00 | A | CNMIGRATRT | AD | 2018 | 15.6 | e |
| 4 | ESTAT:TPS00019(1.0) | 11/07/22 11:00:00 | A | CNMIGRATRT | AD | 2019 | 14.7 | e |
population_change = population_change.drop(columns = ['DATAFLOW','LAST UPDATE', 'freq', 'OBS_FLAG'])
population_change.head()
| indic_de | geo | TIME_PERIOD | OBS_VALUE | |
|---|---|---|---|---|
| 0 | CNMIGRATRT | AD | 2010 | 4.1 |
| 1 | CNMIGRATRT | AD | 2012 | -29.8 |
| 2 | CNMIGRATRT | AD | 2016 | 14.5 |
| 3 | CNMIGRATRT | AD | 2018 | 15.6 |
| 4 | CNMIGRATRT | AD | 2019 | 14.7 |
population_change.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1769 entries, 0 to 1768 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 indic_de 1769 non-null object 1 geo 1769 non-null object 2 TIME_PERIOD 1769 non-null int64 3 OBS_VALUE 1769 non-null float64 dtypes: float64(1), int64(1), object(2) memory usage: 55.4+ KB
population_as_percentage = pd.read_csv('F:\\STUDIA\\7SEM\\mit\\zad2\\dane\\population_as_percentage.csv')
population_as_percentage.head()
| DATAFLOW | LAST UPDATE | freq | indic_de | geo | TIME_PERIOD | OBS_VALUE | OBS_FLAG | |
|---|---|---|---|---|---|---|---|---|
| 0 | ESTAT:TPS00005(1.0) | 11/07/22 11:00:00 | A | POPSHARE_EU27_2020 | AT | 2011 | 1.9 | NaN |
| 1 | ESTAT:TPS00005(1.0) | 11/07/22 11:00:00 | A | POPSHARE_EU27_2020 | AT | 2012 | 1.9 | NaN |
| 2 | ESTAT:TPS00005(1.0) | 11/07/22 11:00:00 | A | POPSHARE_EU27_2020 | AT | 2013 | 1.9 | NaN |
| 3 | ESTAT:TPS00005(1.0) | 11/07/22 11:00:00 | A | POPSHARE_EU27_2020 | AT | 2014 | 1.9 | NaN |
| 4 | ESTAT:TPS00005(1.0) | 11/07/22 11:00:00 | A | POPSHARE_EU27_2020 | AT | 2015 | 1.9 | NaN |
population_as_percentage = population_as_percentage.drop(columns = ['DATAFLOW','LAST UPDATE', 'freq', 'indic_de', 'OBS_FLAG'])
population_as_percentage.head()
| geo | TIME_PERIOD | OBS_VALUE | |
|---|---|---|---|
| 0 | AT | 2011 | 1.9 |
| 1 | AT | 2012 | 1.9 |
| 2 | AT | 2013 | 1.9 |
| 3 | AT | 2014 | 1.9 |
| 4 | AT | 2015 | 1.9 |
population_as_percentage.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 336 entries, 0 to 335 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 geo 336 non-null object 1 TIME_PERIOD 336 non-null int64 2 OBS_VALUE 336 non-null float64 dtypes: float64(1), int64(1), object(1) memory usage: 8.0+ KB
population_as_percentage_PL = population_as_percentage[population_as_percentage['geo'] == 'PL']
population_as_percentage_DE = population_as_percentage[population_as_percentage['geo'] == 'DE']
population_as_percentage_FR = population_as_percentage[population_as_percentage['geo'] == 'FR']
population_as_percentage_HR = population_as_percentage[population_as_percentage['geo'] == 'HR']
fig1 = go.Figure()
fig1.add_trace(go.Bar(x = population_as_percentage_PL['TIME_PERIOD'], y = population_as_percentage_PL['OBS_VALUE'], marker_color = 'black', name = 'Polska (322 575 km^2)', opacity = 0.7))
fig1.add_trace(go.Bar(x = population_as_percentage_DE['TIME_PERIOD'], y = population_as_percentage_DE['OBS_VALUE'], marker_color = 'red', name = 'Niemcy (357 578 km^2)', opacity = 0.7))
fig1.add_trace(go.Bar(x = population_as_percentage_FR['TIME_PERIOD'], y = population_as_percentage_FR['OBS_VALUE'], marker_color = 'gold', name = 'Francja (543 940 km^2)', opacity = 0.7))
fig1.add_trace(go.Bar(x = population_as_percentage_HR['TIME_PERIOD'], y = population_as_percentage_HR['OBS_VALUE'], marker_color = 'blue', name = 'Węgry (93 025 km^2)', opacity = 0.7))
fig1.add_trace(go.Scatter(x = population_as_percentage_PL['TIME_PERIOD'], y = population_as_percentage_PL['OBS_VALUE'], mode = 'lines', marker_color = 'black', name = 'Polska'))
fig1.add_trace(go.Scatter(x = population_as_percentage_DE['TIME_PERIOD'], y = population_as_percentage_DE['OBS_VALUE'], mode = 'lines', marker_color = 'red', name = 'Niemcy'))
fig1.add_trace(go.Scatter(x = population_as_percentage_FR['TIME_PERIOD'], y = population_as_percentage_FR['OBS_VALUE'], mode = 'lines', marker_color = 'gold', name = 'Francja'))
fig1.add_trace(go.Scatter(x = population_as_percentage_HR['TIME_PERIOD'], y = population_as_percentage_HR['OBS_VALUE'], mode = 'lines', marker_color = 'blue', name = 'Węgry'))
fig1.update_layout(title_text='Procentowa populacja krajów Unii europejskiej')
fig1.show()
population_by_age_group_PL = population_by_age_group[population_by_age_group['geo'] == 'PL']
population_by_age_group_DE = population_by_age_group[population_by_age_group['geo'] == 'DE']
population_by_age_group_FR = population_by_age_group[population_by_age_group['geo'] == 'FR']
population_by_age_group_HR = population_by_age_group[population_by_age_group['geo'] == 'HR']
population_by_age_group_EU28 = population_by_age_group[population_by_age_group['geo'] == 'EU28']
fig2 = go.Figure()
fig2.add_trace(go.Scatter3d(x = population_by_age_group_PL['TIME_PERIOD'], y = population_by_age_group_PL['indic_de'], z = population_by_age_group_PL['OBS_VALUE'], mode = 'markers', marker_color = 'black', name = 'Polska (322 575 km^2)'))
fig2.add_trace(go.Scatter3d(x = population_by_age_group_DE['TIME_PERIOD'], y = population_by_age_group_DE['indic_de'], z = population_by_age_group_DE['OBS_VALUE'], mode = 'markers', marker_color = 'red', name = 'Niemcy (357 578 km^2)'))
fig2.add_trace(go.Scatter3d(x = population_by_age_group_FR['TIME_PERIOD'], y = population_by_age_group_FR['indic_de'], z = population_by_age_group_FR['OBS_VALUE'], mode = 'markers', marker_color = 'gold', name = 'Francja (543 940 km^2)'))
fig2.add_trace(go.Scatter3d(x = population_by_age_group_HR['TIME_PERIOD'], y = population_by_age_group_HR['indic_de'], z = population_by_age_group_HR['OBS_VALUE'], mode = 'markers', marker_color = 'blue', name = 'Węgry (93 025 km^2)'))
fig2.add_trace(go.Scatter3d(x = population_by_age_group_EU28['TIME_PERIOD'], y = population_by_age_group_EU28['indic_de'], z = population_by_age_group_EU28['OBS_VALUE'], mode = 'markers', marker_color = 'pink', name = 'EU (28 krajów)'))
fig2.update_layout(autosize = False, width = 900, height = 1200)
fig2.update_layout(scene = dict(xaxis_title = '', yaxis_title = '', zaxis_title = ''))
fig2.update_layout(title = "Populacja według grup wiekowych")
fig2.show()
The crude rate of total change is the ratio of the population change during the year (the difference between the population sizes on 1 January of two consecutive years) to the average population in that year. The value is expressed per 1 000 persons. The crude rate of natural change is the ratio of the natural change during the year (live births minus deaths) to the average population in that year. The value is expressed per 1 000 persons. The crude rate of net migration plus adjustment is defined as the ratio of net migration (including statistical adjustment) during the year to the average population in that year. The value is expressed per 1000 persons. The net migration plus adjustment is calculated as the difference between the total change and the natural change of the population.
# CNMIGRATRT - Crude rate of net migration plus statistical adjustment (Wskaźnik salda migracji + korekta)
# GROWRT - Crude rate of total population change (Wskaźnik całkowitej zmiany populacji)
# NATGROWRT - Crude rate of natural change of population (Wskaźnik przyrostu naturalnego)
population_change_PL2 = population_change[population_change['geo'] == 'PL']
population_change_DE2 = population_change[population_change['geo'] == 'DE']
population_change_test = [dict(type = 'scatter3d', x = population_change_PL2['TIME_PERIOD'], y = population_change_PL2['indic_de'],z = population_change_PL2['OBS_VALUE'], mode = 'lines', name = "PL", transforms = [dict(type = 'groupby', groups = population_change_PL2['indic_de'], styles = [
dict(target = 'CNMIGRATRT', value = dict(marker = dict(color = 'black'))),
dict(target = 'GROWRT', value = dict(marker = dict(color = 'black'))),
dict(target = 'NATGROWRT', value = dict(marker = dict(color = 'black')))
])])]
population_change_test2 = [dict(type = 'scatter3d', x = population_change_DE2['TIME_PERIOD'], y = population_change_DE2['indic_de'],z = population_change_DE2['OBS_VALUE'], mode = 'lines', name = "DE", transforms = [dict(type = 'groupby', groups = population_change_DE2['indic_de'], styles = [
dict(target = 'CNMIGRATRT', value = dict(marker = dict(color = 'red'))),
dict(target = 'GROWRT', value = dict(marker = dict(color = 'red'))),
dict(target = 'NATGROWRT', value = dict(marker = dict(color = 'red')))
])])]
popul = population_change_test + population_change_test2
fig_dict1 = dict(data = popul, scene = dict(xaxis_title = '', yaxis_title = '', zaxis_title = ''))
pio.show(fig_dict1, validate = False)